/*
This program assigns bank health (BH) measures to each location.
Percentile-based measures as well as weighted averages are created
for each date / location / entity-type combination. 
See the discussion below along with bhreadme.docx and the master do file for more info on these measures.
*/

/*

------SOME CONTEXT IS NEEDED TO UNDERSTAND VARIABLE NAMES AND CODE BELOW.					
**'ENTITY' VS 'BANK'
**Note: The terms 'bank' and 'entity' are used somewhat interchangeably, as both refer to a bank in our sample. 
		That said, 'entity' refers to a type of bank whose deposits exhibit a particular relationship to the deposits 
		present in a location as a whole. 
		These relationships are based on the degree to which a bank ('entity') is `dependent' on deposits from a given location,
		which we determine based on the so-called 'location-to-bank' deposity share. (More below.)
		We use the terms "LOCAL" and "MULTI-LOCATIONAL" (sometimes abbreviated MULTI) to distinguish one entity's 
		'location-dependency' from another's.
		
**More specifically, we split locations into two groups of entities:
	1) entities that fall ABOVE a certain THRESHOLD VALUE of the 'location-to-bank' deposit share - these are classified as LOCAL:
		-i.e., a LOCAL bank (entity) is one which has a large share of its deposits in a given location. 
		
	2) entities that fall BELOW a certain THRESHOLD VALUE of the 'location-to-bank' deposit share - these are classified as MULTI:
		-i.e., an entity defined as MULTI-LOCATIONAL is LESS dependent overall on it deposits in a given location (lower deposit share) 

** To group entities and their deposits inside a given location we further employ:
	1) Two relevant THRESHOLD VALUES of the 'location-to-bank' deposit share - 5% and 10%. 
		-i.e., one iteration of the code below distinguishes a LOCAL entity from a MULTI using a THRESHOLD VALUE of 5%. One iteration uses 10%.
	2) Grouping by bank-level-id (id_rssd|rssdid) as well as Bank Holding Company id (a bank's Top Holder, rssd9348). 
	
Within this context, we employ the following shorthand/abbreviations:

"L" - Denotes "LOCAL"
"M" - Denotes "MULTI-LOCATIONAL" (sometimes multi)		 
"05" - 5% THRESHOLD VALUE (for deposit share) used to determine local/multi banks.  Over 5==Local, Below==Multi
"10" - 10% THRESHOLD VALUE (for deposit share) used to determine local/multi banks. Over 10==Local, Below==Multi
"ent" - deposits grouped based on bank-level-id 
"bhc" - deposits grouped based on bhc-level-id 

For example:
"L05bhc" would denote:
	-LOCAL entities determined by a 5% threshold and grouped by their Top Holder's bhc-id (comments might refer to "LOCAL5 bhc")
		
"M10ent" would denote:
	-MULTI-LOCATIONAL entities determined by a 10% threshold and grouped by their bank-level-id (comments might refer to "MULTI10 ent)
	
------	
	
**VARIABLE NAMING CONVENTIONS - GENERAL
"_refix" - variables or filenames that are affected by the FFIEC 034 Form Filer issue (discussed in bhreadme.docx)
"ST" 	- State-Level
"MSA" 	- Metropolitan/Micropolitan Statistical Area
"nMSA" 	- Rural area within a state - i.e., NOT DEFINED BY AN MSA code
"renpl" - real estate non performing loans  			- (renpls / total loans)
"npl" 	-  non performing loans							- (npls / total loans)
"renpa" - real estate non performing asset ratio 		- (renpls / total assets)
"npa" 	-  non performing asset ratio					- (npls / total assets)


Other relevant abbreviations used below:
BH - Bank Health
CB - Commercial Bank
SSB - State Savings Bank
SOD - Summary of Deposits
Call - Call Report Data


For more info, see bhreadme.docx.
*/
*------------------------------------------------------------------------------------------

local out2020_dir <place your filepaths here>
cd "`out2020_dir'"

/*
NOTE: Inline comments in this do file are designed to provide context but be brief
See bhreadme.docx for broader information about the overall process of creating BH measures 
(Also, The authors thought it was important to maintain separate .dta files by location for data checking and analysis purposes)
*/


*The following code sets up relevant local variables used in this file. 

local refix_yesnos 	"Locations Locations_refix" //for looping through 'regular' locations, as well as locations subset for 034 filer fix - see bhreadme.docx if this is unfamiliar

local locTypes 		"MSA nMSA ST" 				//for looping through each location-type

local ren_rats 		"renR  	renaR2"				//organize renpl ratios
local npl_rats 		"nplR  	npaR2"				//organize npl ratios

local ratios 		"" 							//which ratios are looped over are set below, within the conditionals


foreach locType of local locTypes {


	foreach refix_yesno of local refix_yesnos {

		
		*-------------------------------------------------------------
		
		if "`refix_yesno'"=="Locations" {  /* if "Locations" appears in filename, file contains full data series and all banks from our sample - i.e., NOT REFIX subssample*/
		
			local ratios "`ren_rats' `npl_rats'"
			
			if "`locType'"=="MSA" 	{
				local IO_Index "a"
				local byLocVar "msabr" 		//sets by group at MSA-level (msabr) for MSA datasets 
			}
			
			if "`locType'"=="nMSA" 	{
				local IO_Index 	"b"
				local byLocVar	"stalpbr" 	//sets by group at State-level (stalpbr) for nMSA (rural) datasets 
			}
			
			if "`locType'"=="ST" 	{
				local IO_Index 	"c"
				local byLocVar	"stalpbr"	//sets by group at State-level (stalpbr) for State datasets 
			}
			
		}
		*-------------------------------------------------------------

	
	
		*The following set of locals outline how this program structure reads in and handles _refix datasets as opposed to the main time series data. 	
		*Note: Ultimately, we append the "_refix" datasets into the main data structure. This is done in the do file -*05a-*.
			
		if "`refix_yesno'"=="Locations_refix" {
			local ratios "`ren_rats'"	
			
			if "`locType'"=="MSA" 	{
				local IO_Index 	"d"
				local byLocVar "msabr" 		//sets by group at MSA-level (msabr) for MSA datasets 
			}
			if "`locType'"=="nMSA" 	{
				local IO_Index 	"e"
				local byLocVar	"stalpbr"	//sets by group at State-level (stalpbr) for nMSA (rural) datasets 
			}
			if "`locType'"=="ST" 	{
				local IO_Index 	"f"
				local byLocVar	"stalpbr" 	//sets by group at State-level (stalpbr) for ST State datasets 
			}

		}
		*-------------------------------------------------------------	
		local InPutFilename 	"bh-03`IO_Index'-`locType'-`refix_yesno'"
		local OutPutFilename 	"bh-04`IO_Index'-`locType'-`refix_yesno'-wavgsAndPercentiles"
		
		
		
		*<------------optional: uncomment to check file names are correct
		// di "By Group variable to be used: `byLocVar'"
		// di "InputFileName is: `InPutFilename'"
		// di "OutPutFilename is: `OutPutFilename'"
		// }  	//END LOCATION-TYPE LOOP
		// }		//END MAIN / RENPL FIX LOOP	
		
		

	
		use `InPutFilename',clear  //read in file representing all bank deposits in a given location type

		*====================LOCATION-BASED TOTAL DEPOSITS============================================================================================
		/*	
			**Note: The deposit summations calculated below (in addition to the total entity deposits variable [*_2200] created in the previous do file, [03a]) 
			are used as input for calculating the relevant deposit shares, i.e., 'bank-to-location' and 'location-to-bank'. 
			See bhreadme.docx for more detail.
		*/

		
		    bys dateq `byLocVar':   		egen double	DepTotin_`locType'		=total(brnchdep)  	//denominator for 'bank-to-location' deposit share as well as the LocShr* variables created below.
			bys dateq `byLocVar' ent:  		egen double	ent_tot_dep_`locType'	=total(brnchdep)  	//numerator for both 'bank-to-location' & 'location-to-bank' deposit share
			bys dateq `byLocVar' bhc:  		egen double	bhc_tot_dep_`locType'	=total(brnchdep) 	//numerator for both 'bank-to-location' & 'location-to-bank' deposit share, for BHC (top holder) measures
			

		
		
		*====================MAIN DEPOSIT SHARES - 'LOCATION-TO-BANK' & 'BANK-TO-LOCATION'======================================================================================================
		
			*#####  BANK-TO-LOCATION DEPOSIT SHARE 
			
			*(Entity Total Deposits in Location / Location Total Deposits)
			*Answers the Question: How important is entity (bank) to location?
			
			gen ent_2_`locType'		=(ent_tot_dep_`locType'/DepTotin_`locType')
			gen bhc_2_`locType'		=(bhc_tot_dep_`locType'/DepTotin_`locType')
			
			*#####  LOCATION-TO-BANK DEPOSIT SHARE 
			
			*(Entity Total Deposits in Location / Entity Total Deposits)
			*Answers the Question: How important is Location to entity (bank)?
							
			gen `locType'_2_ent		=(ent_tot_dep_`locType'/ent_2200)   //ent_2200 -an institution's total deposits, classified by bank-level-id. Created in do file *-03a*-
			gen `locType'_2_bhc		=(bhc_tot_dep_`locType'/bhc_2200) 	//bhc_2200 -an institution's total deposits, classified by bhc-level-id. Created in do file *-03a*-
		
		
		
		
		*====================USE LOCATION-TO-BANK SHARE TO GENERATE DUMMIES FOR MULTI/LOCAL DESIGNATION===============================================
		/*In this step, we generate a dummy that assigns banks to one of our eight 'entity-types':
		
			L05ent - Local at the 5% 'location-to-bank' threshold, using bank-level-id
			L10ent - Local at the 10% 'location-to-bank' threshold, using bank-level-id
			
			L05bhc - Local at the 5% 'location-to-bank' threshold, using BHC-level-id
			L10bhc - Local at the 10% 'location-to-bank' threshold, using BHC-level-id
			
			M05ent - Multi-Locational at the 5% 'location-to-bank' threshold, using bank-level-id
			M10ent - Multi-Locational at the 10% 'location-to-bank' threshold, using bank-level-id	
			
			M05bhc - Multi-Locational at the 5% 'location-to-bank' threshold, using BHC-level-id
			M10bhc - Multi-Locational at the 10% 'location-to-bank' threshold, using BHC-level-id
			
		*/
			local thrshlds "05 10"
			foreach t of local thrshlds {
			
					*LOCAL 
					g L`t'ent_`locType'_flag=1       if `locType'_2_ent>.`t'    //entity is "local" in location if location has more than `t'% of its total deposits  
					g L`t'bhc_`locType'_flag=1       if `locType'_2_bhc>.`t' 	//entity is "local" in location if  location has more than `t'% of its total deposits
					
					*MULTI-LOCATIONAL 
					g M`t'ent_`locType'_flag=1       if `locType'_2_ent<=.`t'    //entity is "multi-locational" in location if location has less than `t'% of its total deposits    
					g M`t'bhc_`locType'_flag=1       if `locType'_2_bhc<=.`t'    //entity is "multi-locational" in location if location has less than `t'% of its total deposits   
			}
		

		
		
		duplicates drop dateq `byLocVar' ent,force 
		
		
			
		*COUNT NUMBER OF BANK-LEVEL-IDS IN EACH LOCATION
		bys dateq `byLocVar': 	gen countof_ents_in_`locType'=_N	
		egen uniq_bhc = tag(dateq `byLocVar' bhc)
			
			
		
		
		*COUNT NUMBER OF BHC-LEVEL-IDS IN EACH LOCATION
			bys dateq `byLocVar': 	egen countof_bhcs_in_`locType'_ = total(uniq_bhc)								
			bys dateq `byLocVar': 	egen countof_bhcs_in_`locType' 	= max(countof_bhcs_in_`locType'_)			
			drop uniq_bhc countof_bhcs_in_`locType'_ 	

		
		
		**********  SUM TOTAL DEPOSITS BY 'ENTITY-TYPE', BY LOCATION **********************************************************************	
		*This step sums all deposits in a location held by LIKE-ENTITY-TYPES, e.g., all Local entities (defined, for instance, at the 5% threshold)
		
		local 				ent_typs "L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
		foreach LM of local ent_typs {
			preserve
				collapse(sum) ent_tot_dep_`locType' if `LM'_`locType'_flag==1, by(dateq `byLocVar') 					
				ren ent_tot_dep_`locType' `LM's_`locType'_TotDep				
					*#############################
					tempfile 	 		`LM's_`locType'_TotDep	
					save			   ``LM's_`locType'_TotDep'					
					*#############################
			restore 

			
				
			*MERGE ENTITY-TYPE DEPOSIT TOTALS BY LOCATION-TYPE BACK INTO BANK-LEVEL FILE
			*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
			merge m:1 dateq `byLocVar' using ``LM's_`locType'_TotDep',assert(1 3) keep(1 3) nogen
			*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

			*------CREATE 'ENTITY-TO-LOCATION' DEPOSIT SHARES - FOR LOCAL / MULTI ENTITIES

				*Entity Total Deposits in Location / Total Deposits of like Entities in Location
				gen `LM'_2_`locType'			= ( ent_tot_dep_`locType'/`LM's_`locType'_TotDep )  if `LM'_`locType'_flag==1  

				*Note: We compare a given bank (which has already been classified
				*as either Local or Multi-Locational), to all other similarly classified 'entity-types' in that location. 			
								
				/* In this step, we determine if each location on an OVERALL basis is made of Local or Multi-Locational deposits  
					*Answers the Question:  
					What percentage of a location's deposits are held by entities classified as Local vs by entities classified as Multi-Locational.
						**Note: These shares will sum to 1, i.e., :
						[share for Local entities defined at the 5% threshold] + [share for Multi-Locational entities defined at the 5% threshold]==1
				*/
				
				gen LocShr`LM'_2_`locType'	= (`LM's_`locType'_TotDep/DepTotin_`locType')  //captures distribution of Local/Multi entities in a location
			
		} 	

		********************************************************************************			
		
		*GENERATE A COUNT of banks, BY LOCATION, FOR EACH ENTITY-TYPE (i.e., How many local? How many multi-locational?)
		
		*counts used to determine how local or multilocational a location is as well as for some robustness tests in the main analysis of the paper
		
		local 				ent_typs "L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
		foreach LM of local ent_typs {
		
			preserve 
			
				keep if `LM'_`locType'_flag==1
				
				duplicates drop  dateq   `byLocVar'  ent,force							
				bys dateq   `byLocVar': gen countof_`LM's_in_`locType' = _N	
				
				duplicates drop dateq   `byLocVar',force
				keep dateq `byLocVar' countof_`LM's_in_`locType'	
				
				*#############################
				tempfile 	 		countof_`LM's_in_`locType'
				save 			   `countof_`LM's_in_`locType''							
				*#############################
				
			restore		
			
		}
		
		
		********************************************************************************	
		*merge entity-type counts created in previous step back in to bank-level data 
		local 				ent_typs "L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
		foreach LM of local ent_typs {

				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
				merge m:1 dateq `byLocVar' using `countof_`LM's_in_`locType'',assert(1 3) keep(1 3) nogen
				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
				
		}

		
		
		*Optional DATA check *******************************************************************************	
		*Ensure the count of (Multi + Local) Entities in Location equal the Total Count of Banks in Location
		local 	ent_typs "ent bhc"
		foreach ent_typ of local ent_typs {
			local v_list 	countof_L05`ent_typ's_in_`locType' ///
							countof_L10`ent_typ's_in_`locType' ///
							countof_M05`ent_typ's_in_`locType' ///
							countof_M10`ent_typ's_in_`locType'
							
			foreach x of local v_list {
				replace `x'=0 if `x'==.
			}

			gen total_LM05`ent_typ'_count_check = countof_L05`ent_typ's_in_`locType' + countof_M05`ent_typ's_in_`locType'
			gen total_LM10`ent_typ'_count_check = countof_L10`ent_typ's_in_`locType' + countof_M10`ent_typ's_in_`locType'
			
		assert countof_ents_in_`locType'==total_LM10`ent_typ'_count_check,fast						//check compliments of threshold-based bank counts sum to total bank counts -i.e., above + below 5% should==total
		assert countof_ents_in_`locType'==total_LM05`ent_typ'_count_check,fast						//check compliments of threshold-based bank counts sum to total bank counts -i.e., above + below 5% should==total
		}
		********************************************************************************	
		compress, nocoalesce  //make data smaller where/if possible
		
		
		*===============================================================================
		* CALCULATE MAIN BH MEASURES
		*+++++++++++++++++++++++++++++++++++++++++++++
		
		* First, calculate the weighted average of bank health of a given location: (weights = entity-type's 'entity-to-location' deposit share)
		
		gen ent_`locType'_flag=1  			//adds dummy to facilitate loop that includes the 'regular' (full sample) bank observations together with Local/Multi

		local ent_typs "ent L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"  
		foreach LM_or_Reg of local ent_typs {	
			
			*ratios LOOP
			foreach ratio of local ratios {		
				bys dateq `byLocVar': gen   `LM_or_Reg'_2_`locType'_x_`ratio'		=(`ratio'*`LM_or_Reg'_2_`locType')  			//weight entity's balance sheet measure by their share of a given location's total deposits
			
				preserve
					collapse(sum) 		`LM_or_Reg'_2_`locType'_x_`ratio' if `LM_or_Reg'_`locType'_flag==1, by(dateq `byLocVar') 
					
					rename 				`LM_or_Reg'_2_`locType'_x_`ratio'    		`LM_or_Reg'_2_`locType'_x_`ratio'_WAVG  
					
					*#############################
					tempfile 	 		`LM_or_Reg'_2_`locType'_x_`ratio'_WAVG
					save			   ``LM_or_Reg'_2_`locType'_x_`ratio'_WAVG'										
					*#############################
					
				restore 
				
				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
				merge m:1 dateq `byLocVar' using ``LM_or_Reg'_2_`locType'_x_`ratio'_WAVG',assert(1 3) keep(1 3) nogen  //merge weighted averages back into main dataset
				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

			}
		}

		
		*===============================================================================
		*************PERCENTILE-BASED MEASURES - 10, 25, 50 ********************
		*===============================================================================
		/*
		In this step, we create our percentile-based BH measures. 
		In particular, we sort a location's entities by their balance sheet measure, e.g., renpls, starting from the least healthy and moving to the healthiest.
		We then cumulatively sum the relevant 'entity-to-location' deposit share, and capture the balance sheet measure of the entity that crosses each 
		percentile threshold. For example, if the entity with the highest (least healthy) renpl ratio in a location has more than 10% of the location's deposits,
		then that entity's renpl ratio will be assigned to the *p10 variable for that date-location observation.
		
		See bhreadme.docx for more info.
		*/

		foreach ratio of local ratios {		

			*Entity-Type loop
			local ent_typs "ent L05ent M05ent L10ent M10ent L05bhc M05bhc L10bhc M10bhc"
			
			foreach LM_or_Reg of local ent_typs {										
									
			*=========PERCENTILES
			
			preserve 
				
				keep if `LM_or_Reg'_`locType'_flag==1  				//keep only relevant entity-type ,e.g., Local, at the 5% location-to-bank threshold
				
				duplicates tag dateq `byLocVar' ent,gen(tag) 		//assert that observation is a unique date-location-entity 					
					assert tag==0
					drop tag
				
				keep dateq `byLocVar' `ratio'  `LM_or_Reg'_2_`locType' ent    		//all we need is the relevant ratio and the bnk_2_loc ratio for a given loop
				
						
				local sort_order "-"
				
				/*Since a bank's health is inversely related to its NPL ratio, if one were to include metrics that are positively correlated
				with bank health (e.g., equity ratio), one would set the sort_order local to missing, conditional on the type of balance sheet measure
				at hand, e.g., if "`ratio'"=="equityRatio" local sort_order "" 
				
				*/
				
				gsort 	dateq `byLocVar'  `sort_order'`ratio' ent 	 								//including bank id at end of sort - ensures some consistency when two banks might have same ratios
				
				by    	dateq `byLocVar':	gen counter`locType'`ratio'`LM_or_Reg'=_n  										//Note: no bysort here, just by. Counts the number of entities being cumulatively summed - useful for validation, as well as 
				
				by 		dateq `byLocVar': 	gen `LM_or_Reg'_2_`locType'_cusum_by`ratio'=sum(`LM_or_Reg'_2_`locType')  		//Note: no bysort here, just by. cumulative sum

				gsort 	dateq `byLocVar'  `sort_order'`ratio' ent 															//ensure sort

					local pcnts "10 25 50"	
					foreach pcnt of local pcnts {
					
						/*As entities cross each Deposit-Share-Percentile (10,25,50), assign that entity's health measure to the 
						relevant location's BH measure.
						An example, without locals, for clarity:		
							by dateq MSA: gen _renR_p10_by_M05bhc_2_MSA=renR[_n+1] 				/// 
							if 	M05bhc_2_MSA_cusum_byrenR<=.10 & 								///
								M05bhc_2_MSA_cusum_byrenR[_n+1]>.10
						*/
						
						by dateq `byLocVar': gen _`ratio'_p`pcnt'_by_`LM_or_Reg'_2_`locType'=`ratio'[_n+1] 		/// 
						if 	`LM_or_Reg'_2_`locType'_cusum_by`ratio'<=.`pcnt' & 								///
							`LM_or_Reg'_2_`locType'_cusum_by`ratio'[_n+1]>.`pcnt'	

									
						/*the next replace command catches instances where the first entity's deposit share crosses any of the percentiles:
						- i.e., if most unhealthy bank in location is more than 10% of the location's deposits
						then 'manually' assign its balance sheet measure as the relevant BH measure for the location */
						replace _`ratio'_p`pcnt'_by_`LM_or_Reg'_2_`locType'=`ratio' if 						///
						counter`locType'`ratio'`LM_or_Reg'==1 & 											///
						`LM_or_Reg'_2_`locType'_cusum_by`ratio'>=.`pcnt' 

				
						*assign percentile measure to whole location
						egen `ratio'_p`pcnt'_by_`LM_or_Reg'_2_`locType'=max(_`ratio'_p`pcnt'_by_`LM_or_Reg'_2_`locType'),by(dateq `byLocVar')
						drop _`ratio'_p`pcnt'_by_`LM_or_Reg'_2_`locType'

					}
				
				drop `ratio'  counter`locType'`ratio'`LM_or_Reg'
				
				
				duplicates drop dateq `byLocVar', force //all measures assigned - dropping bank-level data for tempfile
				
					*#############################
					tempfile `ratio'_p_by_`LM_or_Reg'_2_`locType'
					save 	``ratio'_p_by_`LM_or_Reg'_2_`locType''
					*#############################
					
					
			restore
				
								
				
				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
				/*Merging location-level data back into bank-level data - again, for ease of later data validation.*/
				merge m:1 date `byLocVar' using ``ratio'_p_by_`LM_or_Reg'_2_`locType'',assert(1 3) keep(1 3) nogen
				*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

			}
		}



		*##################################
		save `OutPutFilename',replace
		*##################################

	
		
	}  	//END LOCATION-TYPE LOOP
}		//END MAIN / RENPL FIX LOOP	
*-------------------------------------------------------------









